home *** CD-ROM | disk | FTP | other *** search
Text File | 1997-06-27 | 11.7 KB | 599 lines | [TEXT/CWIE] |
- /*------------------------------------------------------------------------------
- #
- # NewsTicker, my Hack for 1997
- #
- # HTMLExtractor.cp - Base class to read an HTML page in, and parse
- # out the interesting stuff. Useless on its own,
- # only exists to be derived..
- #
- ------------------------------------------------------------------------------*/
- #include <Threads.h>
- #include <strings.h>
-
- #include "HTMLExtractor.h"
- #include "SubWooferEndPoint.h"
- #include "HTTPEndPoint.h"
- #include "Idler.h"
- #include "TickerGlobals.h" //get our structures and all
- #include "TickerWindowHandler.h"
- #include "BeachBall.h"
-
- #include <string.h>
-
- BeachBall* gTheBall = nil;
-
- class TickerIdler : public Idler
- {
- private:
- long mlWNEDelay;
- HTMLExtractor* mftheExtractor;
-
- protected:
- TickerIdler (const TickerIdler& oRHS);
- TickerIdler& operator= (const TickerIdler& oRHS);
-
- public:
- TickerIdler (HTMLExtractor* theExtractor);
- virtual void YieldAction (void);
- virtual ~TickerIdler (void) { }
- };
- //
- // The tickler below does many things while waiting for data to send/receive from
- // the Internet. It spins a beachball, and recognizes as we come to/from the background,
- // and it scrolls our window bellow us. It also recognizes command-period or closing the
- // window to abort a read/write
- //
- #define kDefaultWNEDelay 15
- #define kDefaultIdlerPeriod 15
-
- TickerIdler::TickerIdler (HTMLExtractor* theExtractor) //default constructor. Tell the Idler base class how often to call yield action
- {
- SetPeriod (kDefaultWNEDelay);
-
- mlWNEDelay = kDefaultIdlerPeriod;
- mftheExtractor = theExtractor;
- }
-
- void TickerIdler::YieldAction (void)
- {
- EventRecord sEvent;
-
- JustHandleWindow();
-
- #ifdef USESUBWOOFER
- YieldToAnyThread();
- #endif
-
- if ((gTheBall!=nil)&&(!gInBackground))
- gTheBall->Idle(); //spin our beach ball cursor
-
- if (!gDoneFlag)
- {
- if (mftheExtractor)
- mftheExtractor->Cancel();
- }
-
- if (WaitNextEvent(everyEvent , &sEvent, mlWNEDelay, nil))
- {
- switch (sEvent.what)
- {
- case kHighLevelEvent:
- AEProcessAppleEvent( &sEvent ) ;
- break;
- case keyDown:
- if (((sEvent.message & charCodeMask)=='.')&&(sEvent.modifiers & cmdKey))
- {
- if (mftheExtractor)
- mftheExtractor->Cancel();
- }
- break;
- case osEvt:
- if (((sEvent.message >> 24) & 0x0FF) == kSuspendResumeMessage) /* high byte of message */
- {
- gInBackground = (sEvent.message & kResumeMask) == 0;
- }
- }
- }
- }
-
- HTMLExtractor::HTMLExtractor (char* theaddress, short theIconID, sMyDataPtr theDataPtr)
- {
- #ifdef USESUBWOOFER
- mfWebPipe = nil;
- #else
- mfHTTPPipe = nil;
- #endif
- mfDoingARead = false;
- mfTheDataPtr = theDataPtr;
- mfLastModified[0] = 0;
- mfIconID = theIconID;
-
- strcpy(mfAddress, theaddress);
- }
-
- HTMLExtractor::~HTMLExtractor (void)
- {
- #ifdef USESUBWOOFER
- if (mfWebPipe)
- {
- delete mfWebPipe; mfWebPipe = nil;
- }
- #else
- if (mfHTTPPipe)
- {
- delete mfHTTPPipe; mfHTTPPipe = nil;
- }
- #endif
- }
-
- void HTMLExtractor::AddEntry(Str255 theSubject, Str255 theURL)
- {
- if (gThePrefs.JustShowFirstThree&&(mfTempHeadlineCount>=3)) //demo mode
- {
- mfDoingARead = false;
- return;
- }
-
- if (mfTempHeadlineCount<tempmaxHeadlines)
- {
- PLstrcpy(mfTempHeadlines[mfTempHeadlineCount].Subject, theSubject);
- PLstrcpy(mfTempHeadlines[mfTempHeadlineCount].URL, theURL);
- mfTempHeadlines[mfTempHeadlineCount].cicnResID = mfIconID;
-
- mfTempHeadlineCount++;
- }
- }
-
- // Called by base app to read all entries in, or check header and see if it's changed
- void HTMLExtractor::ReadEntries (void)
- {
- short index;
- short destindex;
- TickerIdler* theidler = new TickerIdler(this);
- Ptr thebuffer;
- long buffersize;
- OSErr io;
-
- mfDoingARead = true;
- mfReadingHeader = true;
- thetextsize = 0;
- thetagsize = 0;
- AmOnTag = false;
- mfTempHeadlineCount = 0;
-
- if (!gTheBall)
- gTheBall = new BeachBall();
-
- // Use the subwoofer code
-
- #ifdef USESUBWOOFER
- mfReadingHeader = false; //we don't get headers from Subwoofer
- if (mfWebPipe)
- {
- delete mfWebPipe; mfWebPipe = nil;
- }
-
- mfWebPipe = new SubWooferEndPoint(this);
- if (mfWebPipe->StartGettingFile(mfAddress, 80, theidler)!=noErr)
- {
- delete mfWebPipe; mfWebPipe = nil;
- delete theidler;
- return;
- }
- do
- {
- mfWebPipe->DoIdle();
- theidler->YieldAction();
- }
- while (mfDoingARead);
-
- io = mfWebPipe->GetSubWoofHeader(mfLastModified);
-
-
- delete mfWebPipe;
- mfWebPipe = nil;
- #else
- //
- // Use the raw OT stuff
- if (mfHTTPPipe)
- {
- delete mfHTTPPipe; mfHTTPPipe = nil;
- }
-
- mfHTTPPipe = new HTTPEndPoint(this);
- if (mfHTTPPipe->StartGettingFile(mfAddress, 80, theidler)!=noErr)
- {
- delete mfHTTPPipe; mfHTTPPipe = nil;
- delete theidler;
- return;
- }
- do
- {
- mfHTTPPipe->DoIdle();
- theidler->YieldAction();
- }
- while (mfDoingARead);
-
- delete mfHTTPPipe;
- mfHTTPPipe = nil;
- #endif
- delete theidler;
-
- // Delete all entries with cicnResID = mfIconID
- destindex = 0;
- for (index = 0; index < mfTheDataPtr->MsgCount; index++)
- {
- if (mfTheDataPtr->theHeadlines[index].cicnResID!=mfIconID) //don't delete it
- {
- if (index!=destindex) //copy down if we need to
- {
- mfTheDataPtr->theHeadlines[destindex] = mfTheDataPtr->theHeadlines[index];
- }
- destindex++;
- }
- }
- mfTheDataPtr->MsgCount = destindex;
-
- // Now copy the entries we accumulated out
- for (index = 0; index<mfTempHeadlineCount; index++) // copy the entries off
- {
- if (mfTheDataPtr->MsgCount<maxHeadlines)
- {
- mfTheDataPtr->theHeadlines[mfTheDataPtr->MsgCount] = mfTempHeadlines[index];
- mfTheDataPtr->MsgCount++;
- }
- }
- }
-
- // Called by endpoint as it gets strings
- void HTMLExtractor::ReceiveString (char* string, short numchars)
- {
- short index;
- char thechar;
-
- if (mfReadingHeader)
- {
- if (numchars <= 2) //must be crlf
- mfReadingHeader = false;
- else
- {
- //if Last-modifed line, save it
- if (MyCompareStr(string, "Last-Modified:"))
- {
- if (numchars>31)
- numchars = 31;
- mfLastModified[0] = numchars;
- BlockMove(string, &mfLastModified[1], numchars);
- }
- }
- }
- else
- {
- for (index = 0; index<numchars; index++)
- {
- thechar = string[index];
- if ((thechar==0x0d)||(thechar==0x0a)||(thechar==0x09))//make carriage returns and line feeds spaces
- thechar = ' ';
- if (AmOnTag)
- {
- if ((thetagsize<2047)&&((thetagsize>0)||(thechar!=' '))) //add this character to the tag
- {
- thetag[thetagsize] = thechar; thetagsize++;
- }
- if (thechar=='>') //end of tag?
- {
- thetag[thetagsize] = 0; //make it a nice C string
- HandleToken(thetag, thetagsize, true); //and handle it
- thetextsize = 0; //And star getting text
- AmOnTag = false;
- }
- }
- else
- {
- if (thechar=='<') //start of tag?
- {
- if (thetextsize>0) //any text to handle?
- {
- thetext[thetextsize] = 0;
- HandleToken(thetext, thetextsize, false); //handle the text
- }
- thetag[0] = thechar; //put this in the tag and start parsing it
- thetagsize = 1;
- AmOnTag = true;
- }
- else //nope, just add to the text
- {
- if ((thetextsize<2047)&&((thetextsize>0)||(thechar!=' ')))
- {
- thetext[thetextsize] = thechar; thetextsize++;
- }
- }
- }
- }
- }
- }
-
- void HTMLExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
- {
-
- }
-
- void HTMLExtractor::Disconnect(void)
- {
- mfDoingARead = false;
- }
-
- // Cancel the connection
- //
- void HTMLExtractor::Cancel(void)
- {
- mfDoingARead = false;
- }
-
- // Called by base app to read the header in
- void HTMLExtractor::ReadLastModified(void)
- {
- TickerIdler* theidler = new TickerIdler(this);
- Ptr thebuffer;
- long buffersize;
- OSErr io;
-
- mfDoingARead = true;
- mfReadingHeader = true;
- thetextsize = 0;
- thetagsize = 0;
- AmOnTag = false;
- mfTempHeadlineCount = 0;
-
- if (!gTheBall)
- gTheBall = new BeachBall();
-
-
- // Use the subwoofer code
-
- #ifdef USESUBWOOFER
- if (mfWebPipe)
- {
- delete mfWebPipe; mfWebPipe = nil;
- }
-
- mfWebPipe = new SubWooferEndPoint(this);
- if (mfWebPipe->StartGettingHeader(mfAddress, 80, theidler)!=noErr)
- {
- delete mfWebPipe; mfWebPipe = nil;
- delete theidler;
- return;
- }
- do
- {
- mfWebPipe->DoIdle();
- theidler->YieldAction();
- }
- while (mfDoingARead);
- io = mfWebPipe->GetSubWoofHeader(mfLastModified);
-
- delete mfWebPipe;
- mfWebPipe = nil;
- #else
- //
- // Use the raw OT stuff
- if (mfHTTPPipe)
- {
- delete mfHTTPPipe; mfHTTPPipe = nil;
- }
-
- mfHTTPPipe = new HTTPEndPoint(this);
- if (mfHTTPPipe->StartGettingHeader(mfAddress, 80, theidler)!=noErr)
- {
- delete mfHTTPPipe; mfHTTPPipe = nil;
- delete theidler;
- return;
- }
- do
- {
- mfHTTPPipe->DoIdle();
- theidler->YieldAction();
- }
- while (mfDoingARead);
-
- delete mfHTTPPipe;
- mfHTTPPipe = nil;
- #endif
- delete theidler;
-
- }
-
- void HTMLExtractor::GetLastModified (Str31 LastModStr)
- {
- PLstrcpy(LastModStr, mfLastModified);
- }
-
- //
- // Here is some standard code to help parse the HTML
- //
- static char* SkipWhiteChars(char* pcSrc)
- {
- while ((*pcSrc != 0) && ((*pcSrc== ' ') || (*pcSrc == '\r') || (*pcSrc == '\n')))
- pcSrc++;
- return pcSrc;
- }
-
- static char* SkipWhiteCharsAndEqual(char* pcSrc)
- {
- pcSrc = SkipWhiteChars(pcSrc);
-
- if (*pcSrc == '=')
- pcSrc++;
- pcSrc = SkipWhiteChars(pcSrc);
-
- return pcSrc;
- }
-
- Boolean MyCompareStr(char* p1, char* p2)
- {
- short thelength = strlen(p2);
-
- return (IdenticalText (p1, p2, thelength, thelength, nil)==0);
- }
-
- //
- // Look for some quoted data for a given marker
- //
- void FindATag(char* tag, char* theLink, char* theMarker)
- {
- char* cp;
-
- cp = theLink;
- *cp = 0;
-
- do
- {
- if (*tag != ' ')
- return;
- tag++;
-
- //DebugStr("\pPreparing to get the tag");
- tag = ::SkipWhiteChars(tag);
- if (::MyCompareStr(tag, theMarker))
- {
- tag += sizeof(theMarker);
- tag = ::SkipWhiteCharsAndEqual(tag);
-
- if (*tag != '"')
- return;
- tag++;
-
- if (*tag == '#') //A navigation on same page link
- return;
-
- while ((*tag != 0) && (*tag != '"'))
- {
- if (*tag=='?') //restart, this was apple funkiness
- {
- cp = theLink;
- tag++;
- }
- else if (*tag=='$') //another part of funkiness, this isn't a good link
- {
- *theLink = 0;
- return;
- }
- else *(cp++) = *(tag++);
- }
- *cp = 0; //mark the end
- return;
- }
- else
- {
- do //Skip this item. Get past the marker
- {
- tag++;
- }
- while ((*tag!=0) && (*tag!='='));
-
- tag++; //skip the =
- if (*tag=='"')
- {
- tag++;
- do //Skip the quoted data
- {
- tag++;
- }
- while ((*tag!=0) && (*tag!='"'));
- }
-
- do //Skip the data, waiting for a space
- {
- tag++;
- }
- while ((*tag!=0) && (*tag!=' ') && (*tag!='>'));
- }
- }
- while ((*tag!='>')&&(*tag!=0));
- }
-
- static void SaveHRef(char* tag, char* HTMLLink)
- {
- char* cp;
- //See if it's A HREF="
- cp = HTMLLink;
- *cp = 0;
-
- if (*tag != ' ')
- return;
- tag++;
-
- tag = ::SkipWhiteChars(tag);
- if (!::MyCompareStr(tag, "HREF"))
- return;
- tag += 4;
- tag = ::SkipWhiteCharsAndEqual(tag);
-
- if (*tag != '"')
- return;
- tag++;
-
- if (*tag == '#') //A navigation on same page link
- return;
-
- while ((*tag != 0) && (*tag != '"'))
- {
- if (*tag=='?') //restart, this was apple funkiness
- {
- cp = HTMLLink;
- tag++;
- }
- else *(cp++) = *(tag++);
- }
- *cp = 0; //mark the end
- }
-
- static Boolean isFullURL(char* theURL)
- {
- for ( ; *theURL != 0; theURL++)
- if (*theURL == ':')
- return true;
-
- return false;
- }
-
- Boolean HTMLExtractor::ParseGoodURL(char* thestring, Str255 theURL)
- {
- char HTMLLink[256];
- char headerstr[10] = "http://";
- short index;
-
- theURL[0] = 0;
-
- //SaveHRef(thestring, HTMLLink);
-
- FindATag(thestring, HTMLLink, "HREF");
-
- if ((HTMLLink[0]==0)||(HTMLLink[0] == '#'))
- return false;
-
- if (!isFullURL(HTMLLink))
- {
- for (index = 0; headerstr[index]!=0; index++) //http://
- {
- theURL[0]++;theURL[theURL[0]] = headerstr[index];
- }
- for (index = 0; mfAddress[index]!=0; index++) //add our address to it
- {
- theURL[0]++;theURL[theURL[0]] = mfAddress[index];
- }
- theURL[0]++;theURL[theURL[0]] = '/';
- }
-
- index = 0;
- if (HTMLLink[0] == '/')
- index++;
-
- for ( ; HTMLLink[index] != 0; index++)
- {
- theURL[0]++;theURL[theURL[0]] = HTMLLink[index];
- }
-
- return true;
- }
-